import pandas as pd

# 读取数据集
file_path = 'Data/NotClean_EVUsage_Data.csv'
data = pd.read_csv(file_path)

# 显示初始数据
print("初始数据:")
print(data.head())

# 检查缺失值
print("缺失值情况:")
print(data.isnull().sum())
print('*' * 50)

# 删除全是缺失值的列
data.dropna(axis=1, how='all', inplace=True)

# 填充缺失值
data.fillna({
    'Total Duration (hh:mm:ss)': '0:00:00',
    'Charging Time (hh:mm:ss)': '0:00:00',
    'Energy (kWh)': 0,
    'GHG Savings (kg)': 0,
    'Gasoline Savings (gallons)': 0,
    'Fee': 0
}, inplace=True)

# 转换时间相关的列为datetime类型
data['Start Date'] = pd.to_datetime(data['Start Date'])
data['End Date'] = pd.to_datetime(data['End Date'])
data['Transaction Date (Pacific Time)'] = pd.to_datetime(data['Transaction Date (Pacific Time)'])

# 转换持续时间为timedelta类型
data['Total Duration (hh:mm:ss)'] = pd.to_timedelta(data['Total Duration (hh:mm:ss)'])
data['Charging Time (hh:mm:ss)'] = pd.to_timedelta(data['Charging Time (hh:mm:ss)'])

# 删除无用的列
data.drop(columns=['Start Time Zone', 'End Time Zone'], inplace=True)

# 删除重复值
data.drop_duplicates(inplace=True)

# 显示处理后的数据类型
print("处理后的数据类型:")
print(data.dtypes)

# 保存处理后的数据到新的CSV文件
cleaned_file_path = 'Data/Cleaned_EVUsage_Data.csv'
data.to_csv(cleaned_file_path, index=False)

print(f"清洗后的数据已保存到: {cleaned_file_path}")
